Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
Reading CSV file
data=pd.read_csv("Unemployment_Rate_upto_11_2020.csv")
Describing Data
print(data.head())
Region Date Frequency Estimated Unemployment Rate (%) \
0 Andhra Pradesh 31-01-2020 M 5.48
1 Andhra Pradesh 29-02-2020 M 5.83
2 Andhra Pradesh 31-03-2020 M 5.79
3 Andhra Pradesh 30-04-2020 M 20.51
4 Andhra Pradesh 31-05-2020 M 17.43
Estimated Employed Estimated Labour Participation Rate (%) Region.1 \
0 16635535 41.02 South
1 16545652 40.90 South
2 15881197 39.18 South
3 11336911 33.10 South
4 12988845 36.46 South
longitude latitude
0 15.9129 79.74
1 15.9129 79.74
2 15.9129 79.74
3 15.9129 79.74
4 15.9129 79.74
data.info
<bound method DataFrame.info of Region Date Frequency Estimated Unemployment Rate (%) \
0 Andhra Pradesh 31-01-2020 M 5.48
1 Andhra Pradesh 29-02-2020 M 5.83
2 Andhra Pradesh 31-03-2020 M 5.79
3 Andhra Pradesh 30-04-2020 M 20.51
4 Andhra Pradesh 31-05-2020 M 17.43
.. ... ... ... ...
262 West Bengal 30-06-2020 M 7.29
263 West Bengal 31-07-2020 M 6.83
264 West Bengal 31-08-2020 M 14.87
265 West Bengal 30-09-2020 M 9.35
266 West Bengal 31-10-2020 M 9.98
Estimated Employed Estimated Labour Participation Rate (%) Region.1 \
0 16635535 41.02 South
1 16545652 40.90 South
2 15881197 39.18 South
3 11336911 33.10 South
4 12988845 36.46 South
.. ... ... ...
262 30726310 40.39 East
263 35372506 46.17 East
264 33298644 47.48 East
265 35707239 47.73 East
266 33962549 45.63 East
longitude latitude
0 15.9129 79.740
1 15.9129 79.740
2 15.9129 79.740
3 15.9129 79.740
4 15.9129 79.740
.. ... ...
262 22.9868 87.855
263 22.9868 87.855
264 22.9868 87.855
265 22.9868 87.855
266 22.9868 87.855
[267 rows x 9 columns]>
print(data.describe)
<bound method NDFrame.describe of Region Date Frequency Estimated Unemployment Rate (%) \
0 Andhra Pradesh 31-01-2020 M 5.48
1 Andhra Pradesh 29-02-2020 M 5.83
2 Andhra Pradesh 31-03-2020 M 5.79
3 Andhra Pradesh 30-04-2020 M 20.51
4 Andhra Pradesh 31-05-2020 M 17.43
.. ... ... ... ...
262 West Bengal 30-06-2020 M 7.29
263 West Bengal 31-07-2020 M 6.83
264 West Bengal 31-08-2020 M 14.87
265 West Bengal 30-09-2020 M 9.35
266 West Bengal 31-10-2020 M 9.98
Estimated Employed Estimated Labour Participation Rate (%) Region.1 \
0 16635535 41.02 South
1 16545652 40.90 South
2 15881197 39.18 South
3 11336911 33.10 South
4 12988845 36.46 South
.. ... ... ...
262 30726310 40.39 East
263 35372506 46.17 East
264 33298644 47.48 East
265 35707239 47.73 East
266 33962549 45.63 East
longitude latitude
0 15.9129 79.740
1 15.9129 79.740
2 15.9129 79.740
3 15.9129 79.740
4 15.9129 79.740
.. ... ...
262 22.9868 87.855
263 22.9868 87.855
264 22.9868 87.855
265 22.9868 87.855
266 22.9868 87.855
[267 rows x 9 columns]>
see if this dataset contains missing values or not
print(data.isnull().sum())
Region 0 Date 0 Frequency 0 Estimated Unemployment Rate (%) 0 Estimated Employed 0 Estimated Labour Participation Rate (%) 0 Region.1 0 longitude 0 latitude 0 dtype: int64
rename all the columns.
data.colums =["states","Date","Frequency",
"Estimated Unemployment Rate","Estimated Employed",
"Estimated Labour Participation Rate","Region",
"longitude","latitude"]
C:\Users\pavan\AppData\Local\Temp\ipykernel_4548\1947871789.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access data.colums =["states","Date","Frequency",
print(data)
Region Date Frequency Estimated Unemployment Rate (%) \
0 Andhra Pradesh 31-01-2020 M 5.48
1 Andhra Pradesh 29-02-2020 M 5.83
2 Andhra Pradesh 31-03-2020 M 5.79
3 Andhra Pradesh 30-04-2020 M 20.51
4 Andhra Pradesh 31-05-2020 M 17.43
.. ... ... ... ...
262 West Bengal 30-06-2020 M 7.29
263 West Bengal 31-07-2020 M 6.83
264 West Bengal 31-08-2020 M 14.87
265 West Bengal 30-09-2020 M 9.35
266 West Bengal 31-10-2020 M 9.98
Estimated Employed Estimated Labour Participation Rate (%) Region.1 \
0 16635535 41.02 South
1 16545652 40.90 South
2 15881197 39.18 South
3 11336911 33.10 South
4 12988845 36.46 South
.. ... ... ...
262 30726310 40.39 East
263 35372506 46.17 East
264 33298644 47.48 East
265 35707239 47.73 East
266 33962549 45.63 East
longitude latitude
0 15.9129 79.740
1 15.9129 79.740
2 15.9129 79.740
3 15.9129 79.740
4 15.9129 79.740
.. ... ...
262 22.9868 87.855
263 22.9868 87.855
264 22.9868 87.855
265 22.9868 87.855
266 22.9868 87.855
[267 rows x 9 columns]
Look at the correlation b/w the features of this dataset.
plt.style.use("seaborn-whitegrid")
plt.figure(figsize=(12,10))
sns.heatmap(data.corr())
plt.show()
C:\Users\pavan\AppData\Local\Temp\ipykernel_4548\221186264.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use("seaborn-whitegrid")
C:\Users\pavan\AppData\Local\Temp\ipykernel_4548\221186264.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
sns.heatmap(data.corr())
Look at the estimated number of employess according to different regions of india
data.columns =["states","Date","Frequency",
"Estimated Unemployment Rate","Estimated Employed",
"Estimated Labour Participation Rate","Region",
"longitude","latitude"]
plt.title("Indian Unemployment")
sns.histplot(x="Estimated Employed",hue="Region",data=data)
plt.show()
see the unemployment rate according to differentregion of india
plt.figure(figsize=(12,10))
plt.figure("Indian Unemployment")
sns.histplot(x="Estimated Unemployment Rate",hue="Region",data=data)
plt.show()
<Figure size 1200x1000 with 0 Axes>
create a dashboard ti analyze the unemployment rate of each Indian state by region.use sunburst plot.
unemploment = data[["states","Region","Estimated Unemployment Rate"]]
figure = px.sunburst(unemploment,path=["Region","states"],
values="Estimated Unemployment Rate",
width =700,height=700, color_continuous_scale="RdY1Gn",
title="Unemployment Rate in Indian")
figure.show()